from playwright.sync_api import sync_playwright

# 目标URL
url = "http://bm.scs.gov.cn/pp/gkweb/core/web/ui/business/article/articlegroup.html?id=0000000062b7b2b60162bccd55ec0006"
# url = "https://open-live.bilibili.com/document/849b924b-b421-8586-3e5e-765a72ec3840#h3-u6709u54EAu4E9Bu4E92u52A8u65B9u5F0F"
def get_page_content(page_url):
    with sync_playwright() as p:
        # 启动浏览器（非无头模式，方便观察）
        browser = p.chromium.launch(headless=False)
        page = browser.new_page()

        # 打开目标网页
        page.goto(page_url)

        # 等待页面加载完成
        page.wait_for_load_state("networkidle")

        # 提取网页标题
        title = page.title()
        print(f"网页标题: {title}")

        # 获取 <body> 标签的文本内容
        body_element = page.query_selector("body")
        body_text = ""
        if body_element:
            body_text = body_element.inner_text() or ""
            print(f"网页正文内容: {body_text}")
        else:
            print("未找到 <body> 标签。")

        # 关闭浏览器
        browser.close()
        return body_text

# # 调用函数
# body_text = get_page_content(url)
# with open('test.txt','w',encoding='utf-8') as f:
#     f.write(body_text)
